Random KFG Jittered Bin Sample:
    —  Pendant Pendant Sum Monte Carlo Simulation


1. Creating the distributions

1.1 The Existing FieldGuide Distribution

The first dataframe to build is a database of sums using the khipus in the existing KFG.

Code
import numpy as np
import random
from random import choices
import time
import pandas as pd
from pandas import Series, DataFrame

import khipu_kamayuq as kamayuq  # A Khipu Maker is known (in Quechua) as a Khipu Kamayuq
import khipu_qollqa as kq

# Plotly
import plotly
from plotly.offline import iplot, init_notebook_mode
import plotly.graph_objs as go
import plotly.express as px
import plotly.figure_factory as ff
plotly.offline.init_notebook_mode(connected = False)

from monte_carlo import DiscreteDistributionSampler, PendantSummer, StrawmanKhipu
Code
(khipu_dict, all_khipus) = kamayuq.fetch_khipus()
strawmen_kfg_khipu = [StrawmanKhipu(aKhipu.name(), "KFG", [aCord.knotted_value() for aCord in aKhipu.pendant_cords()]) for aKhipu in all_khipus]
strawmen_kfg_df = pd.DataFrame([aStrawmanKhipu.dataframe_tuple() for aStrawmanKhipu in strawmen_kfg_khipu], columns=StrawmanKhipu.dataframe_columns())
strawmen_kfg_df.head()
name source num_pendants mean_cord_value stdev_cord_value num_right_sums num_left_sums num_sums mean_num_summands stdev_num_summands mean_sum_value stdev_sum_value num_sums_per_nonzero_pendant mean_right_handedness stdev_right_handedness mean_left_handedness stdev_left_handedness
0 AS010 KFG 27 8 10.392305 3 2 5 3.0 1.000000 22.0 9.695360 0.227273 4.333333 0.577350 -6.5 4.949747
1 AS011 KFG 15 92 183.904867 0 0 0 0.0 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.0 0.000000
2 AS012 KFG 85 2 5.196152 3 4 7 6.0 2.449490 18.0 7.348469 0.233333 10.333333 6.027714 -9.0 5.099020
3 AS013 KFG 90 4 14.456832 0 5 5 14.0 14.456832 48.0 44.508426 0.121951 0.000000 0.000000 -19.0 8.860023
4 AS014 KFG 42 53 40.137264 1 2 3 2.0 0.000000 99.0 7.000000 0.071429 17.000000 0.000000 -15.0 4.242641
Code
total_right_sums = sum(strawmen_kfg_df.num_right_sums.tolist())
total_left_sums = sum(strawmen_kfg_df.num_left_sums.tolist())
total_sums = total_right_sums + total_left_sums
left_pct = round(100.0*float(total_left_sums)/float(total_sums)) if total_sums > 0 else 0
right_pct = round(100.0*float(total_right_sums)/float(total_sums)) if total_sums > 0 else 0
(left_handed_mean, right_handed_mean) = (round(strawmen_kfg_df.mean_left_handedness.mean(),1), round(strawmen_kfg_df.mean_right_handedness.mean(),1))
(left_handed_stdev, right_handed_stdev) = (round(strawmen_kfg_df.mean_left_handedness.std(),1), round(strawmen_kfg_df.mean_right_handedness.std(),1))

print(f"Existing KFG - Right/Left Distribution = {right_pct}%/{left_pct}% ({total_right_sums}/{total_left_sums=})")
print(f"             - Right/Left Mean Handedness = {right_handed_mean}/{left_handed_mean} ±({right_handed_stdev}/{left_handed_stdev})")
Existing KFG - Right/Left Distribution = 54%/46% (4354/total_left_sums=3734)
             - Right/Left Mean Handedness = 9.9/-8.5 ±(15.0/14.2)

1.2 Strawman Khipus based on a Jittered Bin Distribution based on the KFG Values

It could be argued that a uniform distribution is not similar to the actual khipu distribution which has a lot of low valued cords.

Accordingly, let’s use a jittered_bin distribution based on a probability density of the discrete samples from the Khipu Field Guide. The distribution should have:

  1. The same number of khipus as the KFG
  2. A pendant cord count chosen randomly from the existing khipus’ pendant cord counts
  3. Pendant values that are randomly generated from a jittered bin distribution based on a discrete distribution of the KFG cord values. For more information see - Discrete Distribution Sampler
Code
cords_per_khipu = [aKhipu.num_pendant_cords() for aKhipu in all_khipus]
# Produce a khipu with # of cords sampled from the KFG khipu distribution
def sample_kfg_num_cords():
    num_cords = choices(cords_per_khipu,k=1)[0]
    if num_cords<3: num_cords = 3 # Mutate trivial khipus.
    return num_cords

# Number of random strawman khipus to produce
num_dummy_khipus = len(all_khipus)

def kfg_cord_distribution_sampler():
    cord_values = []
    for aKhipu in all_khipus:
        cord_values += [aCord.knotted_value() for aCord in aKhipu.pendant_cords() if aCord.knotted_value() > 0]
    sampler = DiscreteDistributionSampler(cord_values)
    return sampler
    
sampler = kfg_cord_distribution_sampler()

random_names = [f"rjittered_bin_khipu_{i:05d}" for i in range(num_dummy_khipus)]
strawmen_rjittered_bin_khipu = []
for i in range(num_dummy_khipus):
    #if i%50==0: print(f"Generating {i} of {num_dummy_khipus} random khipus")
    random_cords = [round(x) for x in  sampler.jittered_bin_sample(sample_kfg_num_cords())]
    strawmen_rjittered_bin_khipu.append( StrawmanKhipu(random_names[i], "rjittered_bin", random_cords) )
strawmen_rjittered_bin_df = pd.DataFrame([aStrawmanKhipu.dataframe_tuple() for aStrawmanKhipu in strawmen_rjittered_bin_khipu], columns=StrawmanKhipu.dataframe_columns())
strawmen_rjittered_bin_df.head()
name source num_pendants mean_cord_value stdev_cord_value num_right_sums num_left_sums num_sums mean_num_summands stdev_num_summands mean_sum_value stdev_sum_value num_sums_per_nonzero_pendant mean_right_handedness stdev_right_handedness mean_left_handedness stdev_left_handedness
0 rjittered_bin_khipu_00000 rjittered_bin 36 29.277778 41.427811 8 6 14 3.0 1.732051 43.857143 34.357526 0.388889 8.250000 4.062019 -8.333333 5.785038
1 rjittered_bin_khipu_00001 rjittered_bin 5 8.600000 6.985700 0 0 0 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
2 rjittered_bin_khipu_00002 rjittered_bin 20 272.650000 431.341759 3 1 4 2.0 0.000000 18.500000 4.358899 0.200000 3.333333 1.527525 -2.000000 0.000000
3 rjittered_bin_khipu_00003 rjittered_bin 49 68.877551 107.736104 7 7 14 4.0 2.236068 130.142857 155.251090 0.285714 19.000000 13.904436 -7.285714 7.040698
4 rjittered_bin_khipu_00004 rjittered_bin 21 150.904762 324.194371 0 1 1 3.0 0.000000 28.000000 0.000000 0.047619 0.000000 0.000000 -11.000000 0.000000
Code
total_right_sums = sum(strawmen_rjittered_bin_df.num_right_sums.tolist())
total_left_sums = sum(strawmen_rjittered_bin_df.num_left_sums.tolist())
total_sums = total_right_sums + total_left_sums
print(f"{total_right_sums=} {total_left_sums=}")
left_pct = round(100.0*float(total_left_sums)/float(total_sums)) if total_sums > 0 else 0
right_pct = round(100.0*float(total_right_sums)/float(total_sums)) if total_sums > 0 else 0
(left_handed_mean, right_handed_mean) = (round(strawmen_rjittered_bin_df.mean_left_handedness.mean(),1), round(strawmen_rjittered_bin_df.mean_right_handedness.mean(),1))
(left_handed_stdev, right_handed_stdev) = (round(strawmen_rjittered_bin_df.mean_left_handedness.std(),1), round(strawmen_rjittered_bin_df.mean_right_handedness.std(),1))

print(f"Random Jittered Bin - Right/Left Distribution = {right_pct}%/{left_pct}% ({total_right_sums}/{total_left_sums=})")
print(f"                  - Right/Left Mean Handedness = {right_handed_mean}/{left_handed_mean} ±({right_handed_stdev}/{left_handed_stdev})")

strawmen_rjittered_bin_df.describe()
total_right_sums=9869 total_left_sums=9779
Random Jittered Bin - Right/Left Distribution = 50%/50% (9869/total_left_sums=9779)
                  - Right/Left Mean Handedness = 13.3/-12.7 ±(12.8/12.8)
num_pendants mean_cord_value stdev_cord_value num_right_sums num_left_sums num_sums mean_num_summands stdev_num_summands mean_sum_value stdev_sum_value num_sums_per_nonzero_pendant mean_right_handedness stdev_right_handedness mean_left_handedness stdev_left_handedness
count 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000
mean 63.515385 282.958869 1068.862498 15.183077 15.044615 30.227692 2.947692 2.131828 92.664421 194.303311 0.241734 13.265380 9.573296 -12.738037 9.491821
std 86.719713 785.393759 2491.118041 33.209639 33.053715 66.121454 1.815963 2.443491 107.429548 372.856423 0.209714 12.836147 12.396232 12.839228 12.546433
min 3.000000 4.750000 2.872281 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -65.391892 0.000000
25% 16.000000 86.227083 192.500711 0.000000 0.000000 1.000000 2.000000 0.000000 19.125000 0.000000 0.066667 0.000000 0.000000 -19.986842 0.000000
50% 33.000000 167.729497 466.127534 3.000000 3.000000 6.000000 3.000000 1.414214 60.030303 48.024146 0.200000 10.414286 5.576730 -9.875000 5.300662
75% 76.000000 284.649522 1070.312141 14.000000 14.000000 28.750000 4.000000 3.464102 135.172464 231.001191 0.378788 20.541667 14.566347 0.000000 14.553012
max 864.000000 16565.333333 40556.039337 385.000000 373.000000 758.000000 9.000000 12.922848 1055.000000 3763.154765 0.877315 60.072727 76.186790 0.000000 74.303955

We see the handedness expands less than the random uniform, although the standard deviation is still quite high. The number of sums has increased (from random uniform) slightly to 0.042 sums per pendant, 1/4th of the existing KFG.

2. Random Jittered Bin vs. Existing KFG - Graphical Distribution

To graphically compare the distributions of the random khipus with existing khipus, one dataframe is needed:

Code
def source_color(x): return 0.0 if x == "KFG" else 1.0

combined_kfg_rjittered_bin_df = pd.concat([strawmen_kfg_df, strawmen_rjittered_bin_df], axis=0)
combined_kfg_rjittered_bin_df['source_color'] = [source_color(x) for x in combined_kfg_rjittered_bin_df.source.values]
Code
kfg_left =  sum(strawmen_kfg_df.num_left_sums.tolist())
kfg_right = sum(strawmen_kfg_df.num_right_sums.tolist())
pct_kfg_left = round(100.0*float(kfg_left)/float(kfg_left+kfg_right)) if (kfg_left+kfg_right) > 0 else 0
pct_kfg_right = round(100.0*float(kfg_right)/float(kfg_left+kfg_right)) if (kfg_left+kfg_right) > 0 else 0
rjittered_bin_left = sum(strawmen_rjittered_bin_df.num_left_sums.tolist())
rjittered_bin_right = sum(strawmen_rjittered_bin_df.num_right_sums.tolist())
all_rjittered_bin = rjittered_bin_left + rjittered_bin_right
pct_rjittered_bin_left = round(100.0*float(rjittered_bin_left)/float(all_rjittered_bin)) if all_rjittered_bin > 0 else 0
pct_rjittered_bin_right = round(100.0*float(rjittered_bin_right)/float(all_rjittered_bin)) if all_rjittered_bin > 0 else 0

print(f"Num Right/Left Sums for Existing KFG:{kfg_right}/{kfg_left} ({pct_kfg_right}%/{pct_kfg_left}%)")
print(f"Num Right/Left Sums for Random Jittered Bin: {rjittered_bin_right}/{rjittered_bin_left} ({pct_rjittered_bin_right}%/{pct_rjittered_bin_left}%)")
Num Right/Left Sums for Existing KFG:4354/3734 (54%/46%)
Num Right/Left Sums for Random Jittered Bin: 9869/9779 (50%/50%)
Code
legend_text = "<b>Random Jittered Bin vs KFG - #Sums:</b><i style=\"font-size:10pt;\"> Blue-KFG, Red-Random, Size-#Summands</i>"
fig = (px.scatter(combined_kfg_rjittered_bin_df, x="num_right_sums", y="num_left_sums", log_y=True,log_x=True,
                 size="mean_num_summands",
                 opacity=.4, 
                 color='source_color', color_continuous_scale=['#3c3fff', '#ff3030',],
                 labels={"name": f"Khipu Name"},
                 hover_data=['name'], title=legend_text,
                 width=944, height=944)
        .update_layout(showlegend=False).update(layout_coloraxis_showscale=False).show()
      )

As expected, random sums that are small occur more. They also have fewer summands. Let’s evaluate the number of summands for the random khipus vs. the existing khipus.

Code
legend_text = "<b>Random Jittered Bin vs KFG - #Sums vs #Summands:</b><i style=\"font-size:10pt;\"> Blue-KFG, Red-Random, Size-#Sums/Pendan</i>"
fig = (px.scatter(combined_kfg_rjittered_bin_df, x="mean_num_summands", y="num_sums", log_y=True,#log_x=True,
                 size="num_sums_per_nonzero_pendant",
                 opacity=.4, 
                 color='source_color', color_continuous_scale=['#3c3fff', '#ff3030',],
                 labels={"name": f"Khipu Name"},
                 hover_data=['name'], title=legend_text,
                 width=944, height=944)
        .update_layout(showlegend=False).update(layout_coloraxis_showscale=False).show()
      )

This echoes the previous statement about the number of summands being very different in the Random jittered_bin set! A relatively clear separation occurs.

Code
legend_text = "<b>Random Jittered Bin vs KFG - Sum Handedness:</b><i style=\"font-size:10pt;\"> Blue-KFG, Red-Random, Size-#Sums/Pendant</i>"
fig = (px.scatter(combined_kfg_rjittered_bin_df, x="mean_left_handedness", y="mean_right_handedness",
                 size="num_sums_per_nonzero_pendant", 
                 opacity=0.3,
                 color='source_color', color_continuous_scale=['#3c3fff', '#ff3030', ],
                 labels={"name": f"Khipu Name"},
                 hover_data=['name'], title=legend_text,
                 width=944, height=944)
        .update_layout(showlegend=False).update(layout_coloraxis_showscale=False).show()
      )

Now we’re getting somewhere. Existing KFG Khipus have their sums close, for obvious reasons. However the randomly generated khipus have many more far sums, with a small number of summands.

Code
legend_text = "<b>Random Jittered Bin vs KFG - Mean Sum vs #Sums/Pendant:</b><i style=\"font-size:10pt;\"> Blue-KFG, Red-Random, Size-#Summands</i>"
fig = (px.scatter(combined_kfg_rjittered_bin_df, x="num_sums_per_nonzero_pendant", y="mean_sum_value", log_y=True,
                 size="mean_num_summands", 
                 opacity=0.5,
                 color='source_color', color_continuous_scale=['#3c3fff', '#ff3030', ],
                 labels={"name": f"Khipu Name"},
                 hover_data=['name', 'num_sums', 'mean_sum_value'], title=legend_text,
                 width=944, height=944)
        .update_layout(showlegend=False).update(layout_coloraxis_showscale=False).show()
      )

3. Frequency Distributions

An examination of frequency distributions for key variables, using violin plots, where width=frequency and height=variable being measured.

3.1 Handedness Frequency

Code
combined_kfg_rjittered_bin_df['handedness_bias'] = [(abs(a)-abs(b)) for a,b in zip(combined_kfg_rjittered_bin_df['num_right_sums'].values.tolist(), combined_kfg_rjittered_bin_df['num_left_sums'].values.tolist())]
combined_kfg_rjittered_bin_df['source'] = ["KFG" if source == 'KFG' else "Random jittered_bin" for source in combined_kfg_rjittered_bin_df.source.values.tolist()]
legend_text = "<b>Random Jittered Bin vs KFG - Handedness Bias (#RightHandedSums - #LeftHandedSums)</b>"
fig = (px.violin(combined_kfg_rjittered_bin_df, y="handedness_bias",  
                 points='all', color="source",
                 hover_data=['name', 'num_sums'], title=legend_text,
                 width=944, height=944).show())

As another view of handedness. Let’s create 1000 sets of 650 sample khipus, and view their handedness overall compared to the KFG.

Code
# Compute Expensive - takes roughly 8 minutes per 100 sets
def make_handedness_sample(set_index, sampler):
    random_names = [f"rjittered_bin_khipu_{set_index:05d}_{i:05d}" for i in range(num_dummy_khipus)]
    rjittered_bin_khipu = []
    for i in range(num_dummy_khipus):
        random_cords = [round(x) for x in  sampler.jittered_bin_sample(sample_kfg_num_cords())]
        rjittered_bin_khipu.append( StrawmanKhipu(random_names[i], "rjittered_bin", random_cords) )
    
    rjittered_bin_df = pd.DataFrame([aStrawmanKhipu.dataframe_tuple() for aStrawmanKhipu in rjittered_bin_khipu], 
                                  columns=StrawmanKhipu.dataframe_columns())
    total_right_sums = sum(rjittered_bin_df.num_right_sums.tolist())
    total_left_sums = sum(rjittered_bin_df.num_left_sums.tolist())
    return (total_right_sums, total_left_sums)

def run_handedness_monte_carlo_experiment():
    handedness_set = []
    for i in range(num_sets):
        (right_sums, left_sums) = make_handedness_sample(i, sampler)
        handedness_set.append((right_sums, left_sums, right_sums-left_sums))
        # print(f"handedness_set[{i}] = ({right_sums}, {left_sums}, {right_sums-left_sums})")

        if (i == 0): marker = f"\n0"
        elif i%100 == 0: marker = f"({i})\n0"
        elif i%10 == 0: marker = f"{int(i/10)%10}"
        else: marker = "."
        print(marker, end="")
    print(f"({i})\n", end="")

    handedness_set_df = pd.DataFrame(handedness_set, columns=['num_right_sums', 'num_left_sums', 'handedness'])
    handedness_set_df.to_csv("./CSV/handedness_set.csv", index=False)
    return handedness_set_df

sampler = kfg_cord_distribution_sampler()
num_sets = 1000 
run_experiment = False

if run_experiment: 
    run_handedness_monte_carlo_experiment()
handedness_set_df = pd.read_csv("./CSV/handedness_set.csv");
Code
legend_text = "<b>Handedness Δ By Set</b><i style=\"font-size:.8em;\">- Red=KFG Handedness, Blue=Random Jittered Bin Set Samples</i>"
kfg_handedness = 620 #Num Right/Left Sums for Existing KFG:4354/3734 (54%/46%) - Handedness Delta: 620
fig = (px.violin(handedness_set_df, y="handedness",  
                 points='all', 
                 hover_data=['num_right_sums', 'num_left_sums', 'handedness'], 
                 title=legend_text,
                 width=944, height=944)
         .add_hline(y=kfg_handedness, line_width=3, line_color="red")
         .add_hline(y=800, line_width=.5, line_color="white") #gives graph some breathing room
         .show())

3.2 Sum Means

Code
legend_text = "<b>Violin Plot - Random Jittered Bin vs KFG - Log(Sum Mean)</b>"
combined_kfg_rjittered_bin_df['log_mean_sum'] = [math.log(abs(x)) if x > 0 else 0 for x in combined_kfg_rjittered_bin_df['mean_sum_value'].values.tolist()]
fig = (px.violin(combined_kfg_rjittered_bin_df, y="log_mean_sum",  
                 points='all', color="source",
                 labels={"log_mean_sum": "Log(Sum Mean)"},
                 hover_data=['name', 'num_sums', 'mean_sum_value'], title=legend_text,
                 width=944, height=944).show())

3.3 Number of Sums per Pendant

Code
legend_text = "<b>Violin Plot - Random Jittered Bin vs KFG - #Sums per Pendant</b>"
fig = (px.violin(combined_kfg_rjittered_bin_df, y="num_sums_per_nonzero_pendant",  
                 points='all', color="source",
                 labels={"num_sums_per_nonzero_pendant": "#Sums per Pendant"},
                 hover_data=['name', 'num_sums', 'mean_sum_value'], title=legend_text,
                 width=944, height=944).show())

3.4 Number of Summands per Sum Pendant

Code
legend_text = "<b>Violin Plot - Random Jittered Bin vs KFG - #Summands per Sum</b>"
fig = (px.violin(combined_kfg_rjittered_bin_df, y="mean_num_summands",  
                 points='all', color="source",
                 labels={"mean_num_summands": "#Summands per Sum"},
                 hover_data=['name', 'num_sums', 'mean_sum_value'], title=legend_text,
                 width=944, height=944).show())

This is also as you would expect - khipus that are randomly generated tend to have large sum values, few summands, and few sums per pendant cord.